EA sampling sites
# WD
setwd("~/PhD/London NERC DTP documents/EA data/EnvironmentAgency_EcologyFish_DataDownload_2025-04-02")
# Load data
counts <- read_csv("TraC_Fish_Counts_2025-04-02.csv")
sites <- read_csv("TraC_Fish_Sites_2025-04-02.csv")
# Join count data with site information
counts_clean <- counts %>% left_join(sites, by = "SITE_ID") %>%
mutate(EVENT_DATE = ymd(EVENT_DATE),
EVENT_YEAR = year(EVENT_DATE),
EVENT_MONTH = month(EVENT_DATE),
SEASON = if_else(EVENT_MONTH %in% 4:9, "Summer", "Winter"))
counts_clean <- counts_clean %>%
select(
SITE_ID,
SITE_NAME = SITE_NAME.x,
SITE_PARENT_NAME,
EVENT_DATE,
SPECIES_NAME,
LATIN_NAME,
FISH_COUNT,
TOP_TIER_SITE,
SITE_RANKED_NGR,
SITE_RANKED_EASTING,
SITE_RANKED_NORTHING)
# Convert dates and extract components
counts_clean <- counts_clean %>%
mutate(
EVENT_DATE = ymd(EVENT_DATE),
EVENT_DATE_YEAR = year(EVENT_DATE),
EVENT_MONTH = month(EVENT_DATE),
EVENT_DAY = day(EVENT_DATE))
# Sampling method
counts_clean <- counts_clean %>%
mutate(
METHOD = case_when(
str_detect(str_to_lower(SITE_NAME), "seine") ~ "Seine Net",
str_detect(str_to_lower(SITE_NAME), "beam") ~ "Beam Trawl",
str_detect(str_to_lower(SITE_NAME), "otter") ~ "Otter Trawl",
str_detect(str_to_lower(SITE_NAME), "kick") ~ "Kick Sample",
str_detect(str_to_lower(SITE_NAME), "fyke") ~ "Fyke Net",
str_detect(str_to_lower(SITE_NAME), "push") ~ "Push Net",
str_detect(str_to_lower(SITE_NAME), "hand") ~ "Hand Collection",
str_detect(str_to_lower(SITE_NAME), "gill") ~ "Gill Net",
TRUE ~ "Unknown"))
# Thames only
counts_thames <- counts_clean %>%
filter(str_detect(TOP_TIER_SITE, regex("thames", ignore_case = TRUE)))
# Lat and long
sites_sf <- st_as_sf(counts_thames, coords = c("SITE_RANKED_EASTING", "SITE_RANKED_NORTHING"), crs = 27700) %>%
st_transform(crs = 4326)
coords <- st_coordinates(sites_sf)
sites_sf$lon <- coords[, 1]
sites_sf$lat <- coords[, 2]
# Gears
sites_sf <- sites_sf %>%
mutate(
GEAR_TYPE = case_when(
str_detect(str_to_lower(SITE_NAME), "seine") ~ "Seine Net",
str_detect(str_to_lower(SITE_NAME), "beam") ~ "Beam Trawl",
str_detect(str_to_lower(SITE_NAME), "otter") ~ "Otter Trawl",
str_detect(str_to_lower(SITE_NAME), "kick") ~ "Kick Sample",
str_detect(str_to_lower(SITE_NAME), "fyke") ~ "Fyke Net",
str_detect(str_to_lower(SITE_NAME), "push") ~ "Push Net",
str_detect(str_to_lower(SITE_NAME), "hand") ~ "Hand Collection",
str_detect(str_to_lower(SITE_NAME), "gill") ~ "Gill Net",
TRUE ~ "Unknown"))
# Map
gear_types <- unique(sites_sf$GEAR_TYPE)
gear_pal <- colorFactor(palette = "Set2", domain = gear_types)
leaflet(sites_sf) %>%
addProviderTiles("CartoDB.Positron") %>%
addCircleMarkers(
lng = ~lon, lat = ~lat,
color = ~gear_pal(GEAR_TYPE),
radius = 6,
stroke = TRUE, weight = 1, fillOpacity = 0.9,
popup = ~paste0(
"<b>Site:</b> ", SITE_NAME, "<br>",
"<b>Zone:</b> ", TOP_TIER_SITE, "<br>",
"<b>Gear:</b> ", GEAR_TYPE), group = ~GEAR_TYPE) %>%
addLegend(
position = "bottomright",
pal = gear_pal,
values = ~GEAR_TYPE,
title = "Gear Type",
opacity = 1) %>%
addLayersControl(
overlayGroups = gear_types,
options = layersControlOptions(collapsed = FALSE),
position = "topright")
# RA
gear_totals <- counts_thames %>%
filter(!is.na(METHOD)) %>%
group_by(METHOD) %>%
summarise(
Total_Individuals = sum(FISH_COUNT, na.rm = TRUE),
Total_Species = n_distinct(SPECIES_NAME),
.groups = "drop")
# Get grand total
grand_total <- sum(gear_totals$Total_Individuals)
# Add relative abundance column
gear_totals <- gear_totals %>%
mutate(
Relative_Abundance = round(100 * Total_Individuals / grand_total, 1)
) %>%
arrange(desc(Relative_Abundance)) %>%
mutate(METHOD = factor(METHOD, levels = unique(METHOD)))
ggplot(gear_totals, aes(x = reorder(METHOD, -Relative_Abundance), y = Relative_Abundance)) +
geom_col(fill = "tomato") +
labs(
title = "Relative abundance by sampling method",
x = "Sampling method",
y = "Relative abundance (%)") +
theme_minimal()
# Matrix
gear_species_table <- counts_thames %>%
filter(!is.na(METHOD), !is.na(SPECIES_NAME)) %>%
count(METHOD, SPECIES_NAME) %>%
pivot_wider(names_from = METHOD, values_from = n, values_fill = 0)
gear_pa <- gear_species_table %>%
mutate(across(-SPECIES_NAME, ~ as.integer(. > 0)))
gear_cols <- names(gear_pa)[!names(gear_pa) %in% "SPECIES_NAME"]
gear_unique <- gear_pa %>%
rowwise() %>%
mutate(unique_to = if (sum(c_across(all_of(gear_cols))) == 1) {
gear_cols[which(c_across(all_of(gear_cols)) == 1)]
} else {
NA_character_
}) %>%
ungroup() %>%
filter(!is.na(unique_to))
unique_counts <- gear_unique %>%
count(unique_to, name = "unique_species_count") %>%
arrange(desc(unique_species_count))
ggplot(unique_counts, aes(x = reorder(unique_to, unique_species_count), y = unique_species_count, fill = unique_to)) +
geom_col(show.legend = FALSE) +
coord_flip() +
labs(
title = "Number of species unique to each gear type",
x = "Sampling method", y = "Unique species count") +
theme_minimal()
gear_pa_long <- gear_pa %>%
pivot_longer(cols = -SPECIES_NAME, names_to = "METHOD", values_to = "present")
ggplot(gear_pa_long, aes(x = METHOD, y = SPECIES_NAME, fill = as.factor(present))) +
geom_tile(color = "white") +
scale_fill_manual(values = c("0" = "white", "1" = "steelblue")) +
labs(
title = "Species presence across sampling method",
x = "Sampling method", y = "Species",
fill = "Present") +
theme_minimal() +
theme(axis.text.y = element_text(size = 6))
# Seine net only
counts_thames_filtered <- counts_thames %>%
filter(METHOD == "Seine Net")
# Estuary
diversity_summary <- counts_thames_filtered %>%
group_by(EVENT_DATE_YEAR) %>%
summarise(
Species_Richness = n_distinct(SPECIES_NAME),
Total_Abundance = sum(FISH_COUNT, na.rm = TRUE),
Shannon_Diversity = diversity(table(SPECIES_NAME), index = "shannon"),
Simpson_Diversity = diversity(table(SPECIES_NAME), index = "simpson"),
.groups = "drop") %>%
mutate(
Margalef_Richness = (Species_Richness - 1) / log(Total_Abundance),
Pielou_Evenness = Shannon_Diversity / log(Species_Richness),
EVENT_DATE_YEAR = as.numeric(as.character(EVENT_DATE_YEAR)))
# Reshape
diversity_long <- diversity_summary %>%
pivot_longer(cols = -EVENT_DATE_YEAR, names_to = "Metric", values_to = "Value")
# Raw trends over time
ggplot(diversity_long, aes(x = EVENT_DATE_YEAR, y = Value)) +
geom_line(color = "red", linewidth = 1) +
geom_smooth(method = "loess", se = TRUE, color = "black", linewidth = 1) +
facet_wrap(~ Metric, scales = "free_y") +
labs(
title = "Diversity metrics over time - Thames Estuary (seine net)",
subtitle = "LOESS smoothed trends across all years",
x = "Year", y = "Diversity metric") +
theme_minimal()
# GLMs/LMMs for each metric
glm_richness <- glm(Species_Richness ~ EVENT_DATE_YEAR, data = diversity_summary, family = poisson())
lm_abundance <- lm(Total_Abundance ~ EVENT_DATE_YEAR, data = diversity_summary)
lm_shannon <- lm(Shannon_Diversity ~ EVENT_DATE_YEAR, data = diversity_summary)
lm_simpson <- lm(Simpson_Diversity ~ EVENT_DATE_YEAR, data = diversity_summary)
lm_margalef <- lm(Margalef_Richness ~ EVENT_DATE_YEAR, data = diversity_summary)
lm_evenness <- lm(Pielou_Evenness ~ EVENT_DATE_YEAR, data = diversity_summary)
# Extract slope and p-values
summary_table <- function(model, metric_name) {
coef_summary <- summary(model)$coefficients
slope <- coef_summary["EVENT_DATE_YEAR", "Estimate"]
se <- coef_summary["EVENT_DATE_YEAR", "Std. Error"]
p_col <- intersect(c("Pr(>|t|)", "Pr(>|z|)"), colnames(coef_summary))
p <- coef_summary["EVENT_DATE_YEAR", p_col[1]]
ci_low <- slope - 1.96 * se
ci_high <- slope + 1.96 * se
sig <- case_when(
p <= 0.001 ~ "***",
p <= 0.01 ~ "**",
p <= 0.05 ~ "*",
TRUE ~ "")
data.frame(
Metric = metric_name,
Slope = slope,
SE = se,
CI_low = ci_low,
CI_high = ci_high,
P_value = round(p, 4),
Significance = sig
)
}
# Summary table
results <- bind_rows(
summary_table(glm_richness, "Species Richness"),
#summary_table(lm_abundance, "Total Abundance"),
summary_table(lm_shannon, "Shannon Diversity"),
summary_table(lm_simpson, "Simpson Diversity"),
summary_table(lm_margalef, "Margalef Richness"),
summary_table(lm_evenness, "Pielou Evenness"))
# Plot
ggplot(results, aes(x = Slope, y = reorder(Metric, Slope))) +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray60") +
geom_point(size = 3, color = "tomato") +
geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.2, color = "gray40") +
geom_text(aes(label = Significance), hjust = -0.3, vjust = 0.1, size = 5) +
labs(
title = "Trend in diversity metrics over time - Thames Estuary (seine net)",
subtitle = "Slope ± 95% CI | GLM or LM by metric",
x = "Slope (Change per year)",
y = "Diversity metric",
caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001") +
theme_minimal()
# Zone
zone_summary <- counts_thames_filtered %>%
group_by(EVENT_DATE_YEAR, TOP_TIER_SITE, SPECIES_NAME) %>%
summarise(FISH_COUNT = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")
# Wide community matrix for each zone-year
community_matrix <- zone_summary %>%
pivot_wider(names_from = SPECIES_NAME, values_from = FISH_COUNT, values_fill = 0)
# Diversity metrics
diversity_zone_year <- community_matrix %>%
rowwise() %>%
mutate(
Species_Richness = sum(c_across(-c(EVENT_DATE_YEAR, TOP_TIER_SITE)) > 0),
Total_Abundance = sum(c_across(-c(EVENT_DATE_YEAR, TOP_TIER_SITE))),
Shannon_Diversity = diversity(c_across(-c(EVENT_DATE_YEAR, TOP_TIER_SITE)), index = "shannon"),
Simpson_Diversity = diversity(c_across(-c(EVENT_DATE_YEAR, TOP_TIER_SITE)), index = "simpson")) %>%
ungroup() %>%
mutate(
Margalef_Richness = (Species_Richness - 1) / log(Total_Abundance + 1),
Pielou_Evenness = Shannon_Diversity / log(Species_Richness + 1),
EVENT_DATE_YEAR = as.numeric(EVENT_DATE_YEAR))
# Reshape
diversity_long_zone <- diversity_zone_year %>%
pivot_longer(cols = c(Species_Richness, Shannon_Diversity, Simpson_Diversity,
Margalef_Richness, Pielou_Evenness, Total_Abundance),
names_to = "Metric", values_to = "Value")
# Model
zone_model_summary <- function(df) {
if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Value, na.rm = TRUE) == 0) {
return(tibble(Slope = NA, CI_low = NA, CI_high = NA, P_value = NA, Significance = NA))
}
model <- lm(Value ~ EVENT_DATE_YEAR, data = df)
s <- summary(model)
slope <- coef(s)["EVENT_DATE_YEAR", "Estimate"]
se <- coef(s)["EVENT_DATE_YEAR", "Std. Error"]
p <- coef(s)["EVENT_DATE_YEAR", "Pr(>|t|)"]
ci_low <- slope - 1.96 * se
ci_high <- slope + 1.96 * se
sig <- case_when(
p <= 0.001 ~ "***",
p <= 0.01 ~ "**",
p <= 0.05 ~ "*",
TRUE ~ ""
)
tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}
zone_results <- diversity_long_zone %>%
group_by(TOP_TIER_SITE, Metric) %>%
group_modify(~ zone_model_summary(.x)) %>%
ungroup()
# Plot
ggplot(zone_results %>% filter(!is.na(Slope)),
aes(x = Slope, y = TOP_TIER_SITE, color = Significance)) +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
geom_point(size = 3) +
geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3, color = "gray40") +
facet_wrap(~ Metric, scales = "free_x") +
labs(
title = "Change in diversity metrics over time by zone - Thames Estuary (seine net)",
subtitle = "Slope ± 95% CI | Linear model per zone & metric",
x = "Slope (Change per year)",
y = "Zone",
color = "Significance",
caption = "* p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001") +
theme_minimal() +
theme(strip.text = element_text(face = "bold"))
# Site
diversity_site_year <- counts_thames_filtered %>%
group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME) %>%
summarise(
Species_Richness = n_distinct(SPECIES_NAME),
Total_Abundance = sum(FISH_COUNT, na.rm = TRUE),
Shannon_Diversity = diversity(table(SPECIES_NAME), index = "shannon"),
Simpson_Diversity = diversity(table(SPECIES_NAME), index = "simpson"),
.groups = "drop") %>%
mutate(
Margalef_Richness = (Species_Richness - 1) / log(Total_Abundance),
Pielou_Evenness = Shannon_Diversity / log(Species_Richness),
EVENT_DATE_YEAR = as.numeric(EVENT_DATE_YEAR))
# Reshape
diversity_long_site <- diversity_site_year %>%
pivot_longer(cols = c(Species_Richness, Shannon_Diversity, Simpson_Diversity,
Margalef_Richness, Pielou_Evenness, Total_Abundance),
names_to = "Metric", values_to = "Value")
# Model
site_model_summary <- function(df) {
empty_result <- tibble(Slope = NA_real_, CI_low = NA_real_, CI_high = NA_real_,
P_value = NA_real_, Significance = NA_character_)
if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Value, na.rm = TRUE) == 0) {
return(empty_result)
}
model <- tryCatch(
lm(Value ~ EVENT_DATE_YEAR, data = df),
error = function(e) return(NULL)
)
if (is.null(model)) return(empty_result)
coef_summary <- summary(model)$coefficients
if (!"EVENT_DATE_YEAR" %in% rownames(coef_summary)) return(empty_result)
slope <- coef_summary["EVENT_DATE_YEAR", "Estimate"]
se <- coef_summary["EVENT_DATE_YEAR", "Std. Error"]
p <- coef_summary["EVENT_DATE_YEAR", "Pr(>|t|)"]
ci_low <- slope - 1.96 * se
ci_high <- slope + 1.96 * se
sig <- case_when(
p <= 0.001 ~ "***",
p <= 0.01 ~ "**",
p <= 0.05 ~ "*",
TRUE ~ ""
)
tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}
# Apply per group
site_results <- diversity_long_site %>%
group_by(SITE_PARENT_NAME, Metric) %>%
group_modify(~ site_model_summary(.x)) %>%
ungroup()
site_results_clean <- site_results %>%
filter(!is.na(Slope))
# Plot
ggplot(site_results_clean, aes(x = Slope, y = reorder(SITE_PARENT_NAME, Slope))) +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
geom_point(aes(color = Significance), size = 2) +
geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
facet_wrap(~ Metric, scales = "free_x") +
labs(
title = "Change in diversity metrics over time by site - Thames Estuary (seine net)",
subtitle = "Slope ± 95% CI | Linear model per site & metric",
x = "Slope (Change per year)",
y = "Site",
caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001",
color = "Significance") +
theme_minimal()
# Site ranking
site_ranked_sig <- site_results_clean %>%
filter(Significance != "") %>% # Keep only significant trends
mutate(
Direction = ifelse(Slope > 0, "Increasing", "Decreasing"),
Strength = abs(Slope)) %>%
group_by(Metric) %>%
arrange(desc(P_value)) %>%
mutate(Rank = row_number()) %>%
ungroup()
site_ranked_sig %>%
arrange(Metric, Rank) %>%
select(Metric, SITE_PARENT_NAME, Slope, Direction, P_value, Significance) %>%
gt(groupname_col = "Metric") %>%
fmt_number(columns = c(Slope, P_value), decimals = 3) %>%
tab_header(
title = "Sites with significant trends per diversity metric (seine net)",
subtitle = "Ranked by magnitude of change") %>%
cols_label(
SITE_PARENT_NAME = "Site",
Slope = "Slope",
Direction = "Trend",
P_value = "p-value",
Significance = "Significance")
| Sites with significant trends per diversity metric (seine net) | ||||
| Ranked by magnitude of change | ||||
| Site | Slope | Trend | p-value | Significance |
|---|---|---|---|---|
| Pielou_Evenness | ||||
| Richmond | −0.001 | Decreasing | 0.029 | * |
| Greenwich | −0.001 | Decreasing | 0.022 | * |
| Shannon_Diversity | ||||
| Mucking | −0.058 | Decreasing | 0.002 | ** |
| Simpson_Diversity | ||||
| West Thurrock | −0.002 | Decreasing | 0.036 | * |
| Species_Richness | ||||
| Greenhithe | 3.000 | Increasing | 0.000 | *** |
| Total_Abundance | ||||
| Greenwich | −6.276 | Decreasing | 0.038 | * |
| Richmond | 23.970 | Increasing | 0.022 | * |
# Estuary
species_trends <- counts_thames_filtered %>%
group_by(EVENT_DATE_YEAR, SPECIES_NAME) %>%
summarise(Total_Abundance = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")
# Fit linear models for each species
species_model_summary <- function(df) {
if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Total_Abundance, na.rm = TRUE) == 0) {
return(tibble(Slope = NA, CI_low = NA, CI_high = NA, P_value = NA, Significance = NA))
}
model <- lm(Total_Abundance ~ EVENT_DATE_YEAR, data = df)
s <- summary(model)
slope <- coef(s)["EVENT_DATE_YEAR", "Estimate"]
se <- coef(s)["EVENT_DATE_YEAR", "Std. Error"]
p <- coef(s)["EVENT_DATE_YEAR", "Pr(>|t|)"]
ci_low <- slope - 1.96 * se
ci_high <- slope + 1.96 * se
sig <- case_when(
p <= 0.001 ~ "***",
p <= 0.01 ~ "**",
p <= 0.05 ~ "*",
TRUE ~ ""
)
tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}
# Apply model per species
species_results <- species_trends %>%
group_by(SPECIES_NAME) %>%
group_modify(~ species_model_summary(.x)) %>%
ungroup()
# Filter significant results
species_results_clean <- species_results %>%
filter(!is.na(Slope) & Significance != "")
# Plot
ggplot(species_results_clean, aes(x = Slope, y = reorder(SPECIES_NAME, Slope))) +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
geom_point(aes(color = Significance), size = 2) +
geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
labs(
title = "Species-level trends in abundance over time in the Thames Estuary (seine net)",
subtitle = "Slope ± 95% CI | Linear model per species",
x = "Slope (Change in abundance per year)",
y = "Species",
caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001",
color = "Significance") +
theme_minimal()
#Zone
species_zone_trends <- counts_thames_filtered %>%
group_by(EVENT_DATE_YEAR, TOP_TIER_SITE, SPECIES_NAME) %>%
summarise(Total_Abundance = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")
#Model
species_model_summary <- function(df) {
if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Total_Abundance, na.rm = TRUE) == 0) {
return(tibble(Slope = NA, CI_low = NA, CI_high = NA, P_value = NA, Significance = NA))
}
model <- lm(Total_Abundance ~ EVENT_DATE_YEAR, data = df)
s <- summary(model)
slope <- coef(s)["EVENT_DATE_YEAR", "Estimate"]
se <- coef(s)["EVENT_DATE_YEAR", "Std. Error"]
p <- coef(s)["EVENT_DATE_YEAR", "Pr(>|t|)"]
ci_low <- slope - 1.96 * se
ci_high <- slope + 1.96 * se
sig <- case_when(
p <= 0.001 ~ "***",
p <= 0.01 ~ "**",
p <= 0.05 ~ "*",
TRUE ~ ""
)
tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}
species_zone_results <- species_zone_trends %>%
group_by(TOP_TIER_SITE, SPECIES_NAME) %>%
group_modify(~ species_model_summary(.x)) %>%
ungroup()
species_zone_results_clean <- species_zone_results %>%
filter(!is.na(Slope) & Significance != "")
# Plot
ggplot(species_zone_results_clean, aes(x = Slope, y = reorder(SPECIES_NAME, Slope), color = Significance)) +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
geom_point(size = 2) +
geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
facet_grid(~ TOP_TIER_SITE, scales = "free_y") +
labs(
title = "Species-level abundance trends by estuary zone (seine net)",
subtitle = "Slopes from per-zone linear models ± 95% CI",
x = "Slope (Change in abundance per year)",
y = "Species",
color = "Significance",
caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001") +
theme_minimal(base_size = 12) +
theme(strip.text = element_text(face = "bold", size = 10))
# Site
species_site_trends <- counts_thames_filtered %>%
group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME, SPECIES_NAME) %>%
summarise(Total_Abundance = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop")
# Model
species_model_summary <- function(df) {
if (n_distinct(df$EVENT_DATE_YEAR) < 3 || sd(df$Total_Abundance, na.rm = TRUE) == 0) {
return(tibble(Slope = NA, CI_low = NA, CI_high = NA, P_value = NA, Significance = NA))
}
model <- lm(Total_Abundance ~ EVENT_DATE_YEAR, data = df)
s <- summary(model)
slope <- coef(s)["EVENT_DATE_YEAR", "Estimate"]
se <- coef(s)["EVENT_DATE_YEAR", "Std. Error"]
p <- coef(s)["EVENT_DATE_YEAR", "Pr(>|t|)"]
ci_low <- slope - 1.96 * se
ci_high <- slope + 1.96 * se
sig <- case_when(
p <= 0.001 ~ "***",
p <= 0.01 ~ "**",
p <= 0.05 ~ "*",
TRUE ~ ""
)
tibble(Slope = slope, CI_low = ci_low, CI_high = ci_high, P_value = p, Significance = sig)
}
species_site_results <- species_site_trends %>%
group_by(SITE_PARENT_NAME, SPECIES_NAME) %>%
group_modify(~ species_model_summary(.x)) %>%
ungroup()
species_site_results_clean <- species_site_results %>%
filter(!is.na(Slope) & Significance != "")
# Plot
ggplot(species_site_results_clean,
aes(x = Slope, y = reorder(SPECIES_NAME, Slope), color = Significance)) +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray70") +
geom_point(size = 2) +
geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.3) +
facet_wrap(~ SITE_PARENT_NAME, scales = "free_y", nrow = 5) +
labs(
title = "Species-level abundance trends by site (seine net)",
subtitle = "Slopes from per-site linear models ± 95% CI",
x = "Slope (Change in abundance per year)",
y = "Species",
color = "Significance",
caption = "Significance: * p ≤ 0.05 | ** p ≤ 0.01 | *** p ≤ 0.001") +
theme_minimal(base_size = 12) +
theme(
strip.text = element_text(face = "bold", size = 10),
axis.text.y = element_text(size = 6))
# Filter
fish_clean <- counts_thames_filtered %>%
filter(!is.na(SPECIES_NAME), !is.na(FISH_COUNT))
# Presence/absence
fish_clean <- fish_clean %>%
mutate(PA = as.numeric(FISH_COUNT > 0))
# Summarise
year_species_pa <- fish_clean %>%
group_by(EVENT_DATE_YEAR, SPECIES_NAME) %>%
summarise(present = as.numeric(any(PA == 1)), .groups = "drop") %>%
pivot_wider(names_from = SPECIES_NAME, values_from = present, values_fill = 0) %>%
column_to_rownames("EVENT_DATE_YEAR")
# Matrix
year_species_pa <- as.data.frame(year_species_pa)
# betapart core object
beta_core <- betapart.core(year_species_pa)
# Sørensen-based beta diversity components
beta_res <- beta.pair(beta_core, index.family = "sor")
# Extract matrices
sor_mat <- as.matrix(beta_res$beta.sor)
sim_mat <- as.matrix(beta_res$beta.sim)
sne_mat <- as.matrix(beta_res$beta.sne)
# Extract years in chronological order
years <- sort(as.numeric(rownames(sor_mat)))
# Create a data frame
beta_trends <- tibble(
Year1 = years[-length(years)],
Year2 = years[-1],
beta_sor = map2_dbl(Year1, Year2, ~ sor_mat[as.character(.x), as.character(.y)]),
beta_sim = map2_dbl(Year1, Year2, ~ sim_mat[as.character(.x), as.character(.y)]),
beta_sne = map2_dbl(Year1, Year2, ~ sne_mat[as.character(.x), as.character(.y)]))
# Linear models by component
lm_sor <- lm(beta_sor ~ Year2, data = beta_trends)
lm_sim <- lm(beta_sim ~ Year2, data = beta_trends)
lm_sne <- lm(beta_sne ~ Year2, data = beta_trends)
# Extract R² and p-values
get_lm_stats <- function(model) {
summary_model <- summary(model)
r2 <- round(summary_model$r.squared, 3)
p <- round(summary_model$coefficients[2, 4], 3)
return(list(r2 = r2, p = p))
}
sor_stats <- get_lm_stats(lm_sor)
sim_stats <- get_lm_stats(lm_sim)
sne_stats <- get_lm_stats(lm_sne)
beta_trends_long <- beta_trends %>%
pivot_longer(cols = c(beta_sor, beta_sim, beta_sne),
names_to = "Component",
values_to = "Dissimilarity")
beta_trends_long <- beta_trends_long %>%
mutate(Component = recode(Component,
beta_sor = "Sorensen",
beta_sim = "Turnover",
beta_sne = "Nestedness"))
# R² and p-values
subtitle_text <- paste0(
"Sorensen: R² = ", sor_stats$r2, ", p = ", sor_stats$p, " | ",
"Turnover: R² = ", sim_stats$r2, ", p = ", sim_stats$p, " | ",
"Nestedness: R² = ", sne_stats$r2, ", p = ", sne_stats$p)
# Plot
ggplot(beta_trends_long, aes(x = Year2, y = Dissimilarity, color = Component)) +
geom_line(size = 1) +
geom_point(size = 2) +
scale_color_manual(values = c("Sorensen" = "darkorchid",
"Turnover" = "steelblue",
"Nestedness" = "darkorange")) +
labs(
title = "Year-to-year change in beta diversity components in the Thames Estuary (seine net)",
subtitle = subtitle_text,
x = "Year", y = "Dissimilarity",
color = "Component") +
theme_minimal()
# Zone
fish_clean <- counts_thames_filtered %>%
filter(!is.na(SPECIES_NAME), !is.na(FISH_COUNT)) %>%
mutate(PA = as.numeric(FISH_COUNT > 0))
zone_list <- unique(fish_clean$TOP_TIER_SITE)
all_beta_trends <- list()
# Model
for (zone in zone_list) {
cat("Processing zone:", zone, "\n")
zone_data <- fish_clean %>%
filter(TOP_TIER_SITE == zone)
zone_pa <- zone_data %>%
group_by(EVENT_DATE_YEAR, SPECIES_NAME) %>%
summarise(present = as.numeric(any(PA == 1)), .groups = "drop") %>%
pivot_wider(names_from = SPECIES_NAME, values_from = present, values_fill = 0)
if (nrow(zone_pa) >= 3) {
zone_matrix <- zone_pa %>%
column_to_rownames("EVENT_DATE_YEAR") %>%
as.data.frame()
if (!all(rowSums(zone_matrix) == 0)) {
beta_core <- betapart.core(zone_matrix)
beta_res <- beta.pair(beta_core, index.family = "sor")
sor_mat <- as.matrix(beta_res$beta.sor)
sim_mat <- as.matrix(beta_res$beta.sim)
sne_mat <- as.matrix(beta_res$beta.sne)
years <- sort(as.numeric(rownames(sor_mat)))
# Get year-to-year dissimilarities
beta_trends <- tibble(
Year1 = years[-length(years)],
Year2 = years[-1],
beta_sor = map2_dbl(Year1, Year2, ~ sor_mat[as.character(.x), as.character(.y)]),
beta_sim = map2_dbl(Year1, Year2, ~ sim_mat[as.character(.x), as.character(.y)]),
beta_sne = map2_dbl(Year1, Year2, ~ sne_mat[as.character(.x), as.character(.y)])
) %>%
pivot_longer(cols = c(beta_sor, beta_sim, beta_sne),
names_to = "Component", values_to = "Dissimilarity") %>%
mutate(TOP_TIER_SITE = zone)
all_beta_trends[[zone]] <- beta_trends
}
}
}
# Combine results
beta_trends_all <- bind_rows(all_beta_trends)
# Clean component labels
beta_trends_all <- beta_trends_all %>%
mutate(Component = recode(Component,
beta_sor = "Sorensen",
beta_sim = "Turnover",
beta_sne = "Nestedness"))
# R² and p-values
lm_stats_list <- beta_trends_all %>%
group_by(TOP_TIER_SITE, Component) %>%
summarise(
lm_model = list(lm(Dissimilarity ~ Year2)),
.groups = "drop") %>%
mutate(
summary = map(lm_model, summary),
r2 = map_dbl(summary, ~ round(.x$r.squared, 3)),
p = map_dbl(summary, ~ round(.x$coefficients[2, 4], 3))) %>%
select(TOP_TIER_SITE, Component, r2, p)
facet_grid(rows = vars(TOP_TIER_SITE))
beta_trends_all <- beta_trends_all %>%
mutate(
TOP_TIER_ZONE = factor(TOP_TIER_SITE, levels = c("Thames Upper", "Thames Middle", "Thames Lower")),
)
# Subtitle
zone_order <- c("Thames Upper", "Thames Middle", "Thames Lower")
subtitle_text <- lm_stats_list %>%
mutate(
label = paste0(
"<span style='color:",
case_when(Component == "Sorensen" ~ "darkorchid",
Component == "Turnover" ~ "steelblue",
Component == "Nestedness" ~ "darkorange"),
"'>", Component, "</span>: R² = ", r2, ", p = ", p)) %>%
group_by(TOP_TIER_SITE) %>%
summarise(text = paste(label, collapse = " | "), .groups = "drop") %>%
mutate(TOP_TIER_SITE = factor(TOP_TIER_SITE, levels = zone_order)) %>%
arrange(TOP_TIER_SITE) %>%
summarise(subtitle = paste(text, collapse = "<br><br>")) %>%
pull(subtitle)
# Plot
ggplot(beta_trends_all, aes(x = Year2, y = Dissimilarity, color = Component)) +
geom_line(size = 1) +
geom_point(size = 1.5) +
facet_grid(rows = vars(TOP_TIER_ZONE), scales = "free_y") +
scale_color_manual(values = c("Sorensen" = "darkorchid",
"Turnover" = "steelblue",
"Nestedness" = "darkorange")) +
labs(
title = "Year-to-year beta diversity trends by zone (seine net)",
subtitle = subtitle_text,
x = "Year", y = "Dissimilarity",
color = "Component") +
theme_minimal() +
theme(
plot.subtitle = ggtext::element_markdown(size = 10),
strip.text = element_text(face = "bold", size = 10),
axis.text.x = element_text(angle = 45, hjust = 1))
# Site
fish_clean <- counts_thames_filtered %>%
filter(!is.na(SPECIES_NAME), !is.na(FISH_COUNT)) %>%
mutate(PA = as.numeric(FISH_COUNT > 0))
site_list <- unique(fish_clean$SITE_PARENT_NAME)
site_beta_trends <- list()
# Model
for (site in site_list) {
cat("Processing site:", site, "\n")
site_data <- fish_clean %>%
filter(SITE_PARENT_NAME == site)
site_pa <- site_data %>%
group_by(EVENT_DATE_YEAR, SPECIES_NAME) %>%
summarise(present = as.numeric(any(PA == 1)), .groups = "drop") %>%
pivot_wider(names_from = SPECIES_NAME, values_from = present, values_fill = 0)
if (nrow(site_pa) >= 3) {
site_matrix <- site_pa %>%
column_to_rownames("EVENT_DATE_YEAR") %>%
as.data.frame()
if (!all(rowSums(site_matrix) == 0)) {
beta_core <- betapart.core(site_matrix)
beta_res <- beta.pair(beta_core, index.family = "sor")
sor_mat <- as.matrix(beta_res$beta.sor)
sim_mat <- as.matrix(beta_res$beta.sim)
sne_mat <- as.matrix(beta_res$beta.sne)
years <- sort(as.numeric(rownames(sor_mat)))
beta_trends <- tibble(
Year1 = years[-length(years)],
Year2 = years[-1],
beta_sor = map2_dbl(Year1, Year2, ~ sor_mat[as.character(.x), as.character(.y)]),
beta_sim = map2_dbl(Year1, Year2, ~ sim_mat[as.character(.x), as.character(.y)]),
beta_sne = map2_dbl(Year1, Year2, ~ sne_mat[as.character(.x), as.character(.y)])
) %>%
pivot_longer(cols = c(beta_sor, beta_sim, beta_sne),
names_to = "Component", values_to = "Dissimilarity") %>%
mutate(SITE_PARENT_NAME = site)
site_beta_trends[[site]] <- beta_trends
}
}
}
beta_trends_site <- bind_rows(site_beta_trends) %>%
mutate(Component = recode(Component,
beta_sor = "Sorensen",
beta_sim = "Turnover",
beta_sne = "Nestedness"))
# R² and p-values
lm_stats_list <- beta_trends_site %>%
group_by(SITE_PARENT_NAME, Component) %>%
summarise(lm_model = list(lm(Dissimilarity ~ Year2)), .groups = "drop") %>%
mutate(
summary = map(lm_model, summary),
r2 = map_dbl(summary, ~ round(.x$r.squared, 3)),
p = map_dbl(summary, ~ round(.x$coefficients[2, 4], 3))
) %>%
select(SITE_PARENT_NAME, Component, r2, p)
ggplot(beta_trends_site, aes(x = Year2, y = Dissimilarity, color = Component)) +
geom_line(size = 1) +
geom_point(size = 1.5) +
facet_wrap(~ SITE_PARENT_NAME, scales = "free_y", ncol = 4) +
scale_color_manual(values = c("Sorensen" = "darkorchid",
"Turnover" = "steelblue",
"Nestedness" = "darkorange")) +
labs(
title = "Year-to-year beta diversity trends by site (seine net)",
x = "Year", y = "Dissimilarity",
color = "Component") +
theme_minimal() +
theme(
strip.text = element_text(face = "bold", size = 9),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom")
# Rank and table
lm_stats_list_site <- beta_trends_site %>%
group_by(SITE_PARENT_NAME, Component) %>%
summarise(
lm_model = list(lm(Dissimilarity ~ Year2)),
.groups = "drop") %>%
mutate(
summary = map(lm_model, summary),
slope = map_dbl(summary, ~ coef(.x)["Year2", "Estimate"]),
r2 = map_dbl(summary, ~ round(.x$r.squared, 3)),
p = map_dbl(summary, ~ round(coef(.x)["Year2", "Pr(>|t|)"], 3)),
Significant = p <= 0.05,
Direction = case_when(
Significant & slope > 0 ~ "Increasing",
Significant & slope < 0 ~ "Decreasing",
TRUE ~ "Stable")) %>%
select(SITE_PARENT_NAME, Component, slope, r2, p, Significant, Direction)
beta_trend_summary_site <- lm_stats_list_site %>%
arrange(Component, Direction, SITE_PARENT_NAME)
beta_trend_summary_site_sig <- beta_trend_summary_site %>%
filter(Significant == TRUE)
beta_trend_summary_site_sig %>%
gt(groupname_col = "Component") %>%
fmt_number(columns = c(slope, r2, p), decimals = 3) %>%
tab_header(
title = "Significant beta diversity trends by site (seine net)",
subtitle = "Only sites with p ≤ 0.05 for Sorensen, Turnover, or Nestedness components") %>%
cols_label(
SITE_PARENT_NAME = "Site",
slope = "Slope",
r2 = "R²",
p = "p-value",
Direction = "Trend") %>%
tab_options(
table.font.size = "small",
heading.title.font.size = 14,
heading.subtitle.font.size = 11,
row.striping.include_table_body = TRUE)
# Identify dominant species per site-year
dominant_species <- counts_thames_filtered %>%
group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME, SPECIES_NAME) %>%
summarise(Abundance = sum(FISH_COUNT, na.rm = TRUE), .groups = "drop") %>%
group_by(EVENT_DATE_YEAR, SITE_PARENT_NAME) %>%
mutate(Proportion = Abundance / sum(Abundance)) %>%
filter(Proportion == max(Proportion))
# Max dominance index per site-year
dominance_index <- dominant_species %>%
select(EVENT_DATE_YEAR, SITE_PARENT_NAME, Max_Dominance = Proportion)
# Diversity metrics
diversity_metrics_site <- diversity_long_site %>%
filter(Metric %in% c("Pielou_Evenness", "Species_Richness")) %>%
pivot_wider(names_from = Metric, values_from = Value)
# Combine with dominant species
dominance_combined <- diversity_metrics_site %>%
left_join(dominance_index, by = c("EVENT_DATE_YEAR", "SITE_PARENT_NAME")) %>%
left_join(dominant_species %>% select(EVENT_DATE_YEAR, SITE_PARENT_NAME, SPECIES_NAME),
by = c("EVENT_DATE_YEAR", "SITE_PARENT_NAME"))
# Linear models
lm_evenness <- lm(Pielou_Evenness ~ Max_Dominance + SPECIES_NAME, data = dominance_combined)
lm_richness <- lm(Species_Richness ~ Max_Dominance + SPECIES_NAME, data = dominance_combined)
# Extract significant species
extract_significant_species <- function(model) {
summary(model)$coefficients %>%
as.data.frame() %>%
rownames_to_column("Term") %>%
filter(grepl("SPECIES_NAME", Term) & `Pr(>|t|)` < 0.05) %>%
mutate(SPECIES_NAME = gsub("SPECIES_NAME", "", Term)) %>%
pull(SPECIES_NAME)
}
# Significant species per metric
sig_species_evenness <- extract_significant_species(lm_evenness)
sig_species_richness <- extract_significant_species(lm_richness)
sig_species_all <- union(sig_species_evenness, sig_species_richness)
# Sites where these species were dominant
sig_species_sites <- dominance_combined %>%
filter(SPECIES_NAME %in% sig_species_all) %>%
mutate(
Affects = case_when(
SPECIES_NAME %in% sig_species_evenness & SPECIES_NAME %in% sig_species_richness ~ "Both",
SPECIES_NAME %in% sig_species_evenness ~ "Evenness",
SPECIES_NAME %in% sig_species_richness ~ "Richness",
TRUE ~ NA_character_
)
) %>%
filter(!is.na(Affects))
# Format
sig_species_plot_data <- sig_species_sites %>%
pivot_longer(cols = c(Pielou_Evenness, Species_Richness),
names_to = "Metric", values_to = "Value") %>%
filter(
(Metric == "Pielou_Evenness" & Affects %in% c("Evenness", "Both")) |
(Metric == "Species_Richness" & Affects %in% c("Richness", "Both"))
) %>%
mutate(
Affects = factor(Affects, levels = c("Richness", "Evenness", "Both")),
Metric = recode(Metric,
"Pielou_Evenness" = "Evenness",
"Species_Richness" = "Richness")
)
# Plot
ggplot(sig_species_plot_data,
aes(x = EVENT_DATE_YEAR, y = reorder(SITE_PARENT_NAME, EVENT_DATE_YEAR))) +
geom_point(aes(size = Max_Dominance, color = Affects), alpha = 0.8) +
scale_color_manual(values = c("Richness" = "steelblue", "Evenness" = "firebrick", "Both" = "purple")) +
facet_grid(Metric ~ SPECIES_NAME, scales = "free", switch = "x") +
labs(
title = "Sites dominated by species significantly affecting diversity metrics (seine net)",
subtitle = "Point size = dominance level | Color = affected metric(s)",
x = "Year",
y = "Site",
color = "Affects",
size = "Dominance") +
theme_minimal(base_size = 12) +
theme(
axis.text.y = element_text(size = 7),
axis.text.x = element_text(angle = 45, hjust = 1),
strip.text = element_text(face = "bold", size = 10),
strip.placement = "outside",
panel.spacing = unit(1, "lines"),
legend.position = "bottom")
EA sampling sites
# Load data
setwd("~/PhD/London NERC DTP documents/EA data/EnvironmentAgency_EcologyFish_DataDownload_2025-04-02")
lengths <- read_csv("TraC_Fish_Individual_Lengths_2025-04-02.csv")
sites <- read_csv("TraC_Fish_Sites_2025-04-02.csv")
lengths_clean <- lengths %>% left_join(sites, by = "SITE_ID")
lengths_clean <- lengths_clean %>%
select(
SITE_ID,
SITE_NAME = SITE_NAME.x,
SITE_PARENT_NAME,
EVENT_DATE,
SPECIES_NAME,
LATIN_NAME,
FISH_LENGTH,
TOP_TIER_SITE,
SITE_RANKED_NGR,
SITE_RANKED_EASTING,
SITE_RANKED_NORTHING)
# Convert dates and extract components
lengths_clean <- lengths_clean %>%
mutate(
EVENT_DATE = ymd(EVENT_DATE),
EVENT_YEAR = year(EVENT_DATE),
EVENT_MONTH = month(EVENT_DATE),
EVENT_DAY = day(EVENT_DATE))
# Sampling method
lengths_clean <- lengths_clean %>%
mutate(
METHOD = case_when(
str_detect(str_to_lower(SITE_NAME), "seine") ~ "Seine Net",
str_detect(str_to_lower(SITE_NAME), "beam") ~ "Beam Trawl",
str_detect(str_to_lower(SITE_NAME), "otter") ~ "Otter Trawl",
str_detect(str_to_lower(SITE_NAME), "kick") ~ "Kick Sample",
str_detect(str_to_lower(SITE_NAME), "fyke") ~ "Fyke Net",
str_detect(str_to_lower(SITE_NAME), "push") ~ "Push Net",
str_detect(str_to_lower(SITE_NAME), "hand") ~ "Hand Collection",
str_detect(str_to_lower(SITE_NAME), "gill") ~ "Gill Net",
TRUE ~ "Unknown"))
# Thames only
lengths_thames <- lengths_clean %>%
filter(str_detect(TOP_TIER_SITE, regex("thames", ignore_case = TRUE)))
# Lat and long
sites_sf <- st_as_sf(lengths_thames, coords = c("SITE_RANKED_EASTING", "SITE_RANKED_NORTHING"), crs = 27700) %>%
st_transform(crs = 4326)
coords <- st_coordinates(sites_sf)
sites_sf$lon <- coords[, 1]
sites_sf$lat <- coords[, 2]
# Gears
sites_sf <- sites_sf %>%
mutate(
GEAR_TYPE = case_when(
str_detect(str_to_lower(SITE_NAME), "seine") ~ "Seine Net",
str_detect(str_to_lower(SITE_NAME), "beam") ~ "Beam Trawl",
str_detect(str_to_lower(SITE_NAME), "otter") ~ "Otter Trawl",
str_detect(str_to_lower(SITE_NAME), "kick") ~ "Kick Sample",
str_detect(str_to_lower(SITE_NAME), "fyke") ~ "Fyke Net",
str_detect(str_to_lower(SITE_NAME), "push") ~ "Push Net",
str_detect(str_to_lower(SITE_NAME), "hand") ~ "Hand Collection",
str_detect(str_to_lower(SITE_NAME), "gill") ~ "Gill Net",
TRUE ~ "Unknown"))
# Map
gear_types <- unique(sites_sf$GEAR_TYPE)
gear_pal <- colorFactor(palette = "Set2", domain = gear_types)
leaflet(sites_sf) %>%
addProviderTiles("CartoDB.Positron") %>%
addCircleMarkers(
lng = ~lon, lat = ~lat,
color = ~gear_pal(GEAR_TYPE),
radius = 6,
stroke = TRUE, weight = 1, fillOpacity = 0.9,
popup = ~paste0(
"<b>Site:</b> ", SITE_NAME, "<br>",
"<b>Zone:</b> ", TOP_TIER_SITE, "<br>",
"<b>Gear:</b> ", GEAR_TYPE), group = ~GEAR_TYPE) %>%
addLegend(
position = "bottomright",
pal = gear_pal,
values = ~GEAR_TYPE,
title = "Gear Type",
opacity = 1) %>%
addLayersControl(
overlayGroups = gear_types,
options = layersControlOptions(collapsed = FALSE),
position = "topright")